Arithmetic


  • Addition: 3 + 2
  • Subtraction: 3 - 2
  • Multiplication: 3 * 2
  • Division: `3 / 2
  • Exponentation: 3 ^ 2
  • Modulo (remainder): 3 %% 2

Data Types


# Determine type

class(1)
## [1] "numeric"
class("1")
## [1] "character"
class(TRUE)
## [1] "logical"
class(NULL)
## [1] "NULL"

Factors


speed_vector <- c("medium", "slow", "slow", "medium", "fast")

factor_speed_vector <- factor(speed_vector,ordered=TRUE,levels=c("slow","medium","fast"))

factor_speed_vector
## [1] medium slow   slow   medium fast  
## Levels: slow < medium < fast
summary(factor_speed_vector)
##   slow medium   fast 
##      2      2      1

Vectors


  • One dimensional array
  • The elements in a vector all have the same data type

Creating

# Creating

eg_vector <- c(1, 2, 3)

# Naming

names(eg_vector) <- c("item1", "item2", "item3")

eg_vector
## item1 item2 item3 
##     1     2     3

Operations

- Adding Vectors

c(1, 2, 3) + c(4, 5, 6)

c(1 + 4, 2 + 5, 3 + 6)

c(5, 7, 9)

A_vector <- c(1, 2, 3)
B_vector <- c(4, 5, 6)
A_vector + B_vector
## [1] 5 7 9
sum(A_vector)
## [1] 6

- Evaluating

  • < for less than
  • > for greater than
  • <= for less than or equal to
  • >= for greater than or equal to
  • == for equal to each other

  • ! for not (e.g. !=)
  • & for and
  • | for or

A_vector > 2
## [1] FALSE FALSE  TRUE
A_vector[A_vector > 2]
## [1] 3

Indexing

In R, indexing starts at 1

eg_vector[1]
## item1 
##     1
eg_vector[c(1,3)]
## item1 item3 
##     1     3
eg_vector[1:3]
## item1 item2 item3 
##     1     2     3
eg_vector["item1"]
## item1 
##     1

Matrices


  • Two dimensional array
  • The elements in a matrix all have the same data type

Creating

# Creating

matrix(1:9,byrow=TRUE,nrow=3)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
matrix(1:9,byrow=FALSE,nrow=3)
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
# Naming 

eg_matrix <- matrix(1:9,byrow=TRUE,nrow=3)

rownames(eg_matrix) <- c("MA", "NY", "CO")
colnames(eg_matrix) <- c("2010", "2011", "2013")

eg_matrix
##    2010 2011 2013
## MA    1    2    3
## NY    4    5    6
## CO    7    8    9

Operations

rowSums(eg_matrix)
## MA NY CO 
##  6 15 24
colSums(eg_matrix)
## 2010 2011 2013 
##   12   15   18
matrix_a <- matrix(1:9,byrow=TRUE,nrow=3)

matrix_b <- matrix(1:9,byrow=FALSE,nrow=3)

matrix_a + matrix_b
##      [,1] [,2] [,3]
## [1,]    2    6   10
## [2,]    6   10   14
## [3,]   10   14   18

Editing

cbind(eg_matrix, eg_vector)
##    2010 2011 2013 eg_vector
## MA    1    2    3         1
## NY    4    5    6         2
## CO    7    8    9         3
rbind(eg_matrix, eg_vector)
##           2010 2011 2013
## MA           1    2    3
## NY           4    5    6
## CO           7    8    9
## eg_vector    1    2    3

Indexing

eg_matrix[1,2]
## [1] 2
eg_matrix[1:2,2:3]
##    2011 2013
## MA    2    3
## NY    5    6
eg_matrix[c(1,3),2:3]
##    2011 2013
## MA    2    3
## CO    8    9
eg_matrix[,1]
## MA NY CO 
##  1  4  7
eg_matrix[1,]
## 2010 2011 2013 
##    1    2    3

Dataframes


  • Two-dimensional objects
  • Within a column all elements have the same data type, but different columns can be of different data type

Creating

name <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune")
type <- c("Terrestrial", "Terrestrial", "Terrestrial", "Terrestrial", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883)
rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67)
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)

planets_df <- data.frame(name, type, diameter, rotation, rings)

planets_df
name type diameter rotation rings
Mercury Terrestrial 0.382 58.64 FALSE
Venus Terrestrial 0.949 -243.02 FALSE
Earth Terrestrial 1.000 1.00 FALSE
Mars Terrestrial 0.532 1.03 FALSE
Jupiter Gas giant 11.209 0.41 TRUE
Saturn Gas giant 9.449 0.43 TRUE
Uranus Gas giant 4.007 -0.72 TRUE
Neptune Gas giant 3.883 0.67 TRUE

Exploring

head(planets_df)
name type diameter rotation rings
Mercury Terrestrial 0.382 58.64 FALSE
Venus Terrestrial 0.949 -243.02 FALSE
Earth Terrestrial 1.000 1.00 FALSE
Mars Terrestrial 0.532 1.03 FALSE
Jupiter Gas giant 11.209 0.41 TRUE
Saturn Gas giant 9.449 0.43 TRUE
tail(planets_df, 2)
name type diameter rotation rings
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
str(planets_df)
## 'data.frame':    8 obs. of  5 variables:
##  $ name    : Factor w/ 8 levels "Earth","Jupiter",..: 4 8 1 3 2 6 7 5
##  $ type    : Factor w/ 2 levels "Gas giant","Terrestrial": 2 2 2 2 1 1 1 1
##  $ diameter: num  0.382 0.949 1 0.532 11.209 ...
##  $ rotation: num  58.64 -243.02 1 1.03 0.41 ...
##  $ rings   : logi  FALSE FALSE FALSE FALSE TRUE TRUE ...

Indexing

planets_df[1,2]
## [1] Terrestrial
## Levels: Gas giant Terrestrial
planets_df[1:2,2:3]
type diameter
Terrestrial 0.382
Terrestrial 0.949
planets_df[c(1,3),2:3]
type diameter
1 Terrestrial 0.382
3 Terrestrial 1.000
planets_df[,1:3]
name type diameter
Mercury Terrestrial 0.382
Venus Terrestrial 0.949
Earth Terrestrial 1.000
Mars Terrestrial 0.532
Jupiter Gas giant 11.209
Saturn Gas giant 9.449
Uranus Gas giant 4.007
Neptune Gas giant 3.883
planets_df[1,]
name type diameter rotation rings
Mercury Terrestrial 0.382 58.64 FALSE
planets_df[1:3,"diameter"]
## [1] 0.382 0.949 1.000
planets_df$rings
## [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
planets_df[planets_df$rings==TRUE,]
name type diameter rotation rings
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE

Operations

planets_df[order(planets_df$diameter),"name"]
## [1] Mercury Mars    Venus   Earth   Neptune Uranus  Saturn  Jupiter
## Levels: Earth Jupiter Mars Mercury Neptune Saturn Uranus Venus
planets_df[order(planets_df$diameter,decreasing=TRUE),"name"]
## [1] Jupiter Saturn  Uranus  Neptune Earth   Venus   Mars    Mercury
## Levels: Earth Jupiter Mars Mercury Neptune Saturn Uranus Venus

Lists


  • One-dimensional array
  • The elements in a list do not have to be related in any way

Creating

# Creating 

list(A_vector, B_vector)
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] 4 5 6
# Naming 

list("A" = A_vector,
     "B" = B_vector)
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6
eg_list <- list(A_vector, B_vector)

names(eg_list) <- c("A", "B")

Editing

c(eg_list, eg_list)
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6
## 
## $A
## [1] 1 2 3
## 
## $B
## [1] 4 5 6

Indexing

eg_list[[1]]
## [1] 1 2 3
eg_list[["A"]]
## [1] 1 2 3
eg_list$A
## [1] 1 2 3
eg_list[[1]][1]
## [1] 1

If Statement


if (condition) {
  expression
}

if (condition) {
  expression_1
} else {
  expression_2
}

if (condition_1) {
  expression_1
} else if (condition_2) {
  expression_2
} else {
  expression_3
}

Loops


While

while (condition) {
  expression
}

i <- 1
while (i <= 10) {
  expression
  
  i <- i + 1
}

For

for (item in list) {
  expression
}

for (i in seq_along(list)) {
  expression on list[i]
}

output <- vector("double", length(list))
for (i in seq_along(list)) {
  output[[i]] <- expression on list[i]
}

Functions


Existing

install.packages("package")
library(package)
  • abs(): Calculate the absolute value.
  • sum(): Calculate the sum of all the values in a data structure.
  • mean(): Calculate the arithmetic mean.
  • round(): Round the values to 0 decimal places by default.

  • seq(): Generate sequences, by specifying the from, to, and by arguments.
  • rep(): Replicate elements of vectors and lists.
  • sort(): Sort a vector in ascending order. Works on numerics, but also on character strings and logicals.
  • rev(): Reverse the elements in a data structures for which reversal is defined.
  • str(): Display the structure of any R object.
  • append(): Merge vectors or lists.
  • is.*(): Check for the class of an R object.
  • as.*(): Convert an R object from one class to another.
  • unlist(): Flatten (possibly embedded) lists to produce a vector.


Creating

function_name <- function (argument_1, argument_2) {
  expression
}

function_name <- function (argument_1, argument_2 = default_value) {
  expression
}

function_name <- function(argument_1, ...) {
  expression
}

Applying

  • lapply returns a list
  • sapply tries to return a simplified list as a vector or matrix
  • vapply is like sapply but with the result type specified “data type(length)”
lapply(vector, existing_function)
sapply(vector, existing_function)
vapply(vector, existing_function, numeric(4))

lapply(vector, existing_function, other_arguments)

lapply(vector, function(argument){new_function expression})

Mapping

map(.x, .f, ...) maps function .f over data .x using ... additional arguments

  • If .x is a dataframe, .f is mapped over each column
  • If .x is a list or vector, .f is mapped over each item

map() returns a list
map_dbl() returns a double vector
map_lgl() returns a logical vector
map_int() returns an integer vector
map_chr() returns a character vector

map(.x, function(x) something) maps a function on the fly

Regular Expressions


Apply

  • grepl() returns TRUE when a pattern is found in the corresponding character string.
  • grep() returns a vector of indices of the character strings that contains the pattern.
grep(regular_expression, search_location)
  • sub() replaces the first match
  • gsub() replaces all matches
gep(regular_expression, replace_value, search_location)

Dates and Times


Now

Sys.Date()
## [1] "2019-01-10"
Sys.time()
## [1] "2019-01-10 18:11:45 EST"

Setting

  • %Y: 4-digit year (1982)
  • %y: 2-digit year (82)
  • %m: 2-digit month (01)
  • %d: 2-digit day of the month (13)
  • %A: weekday (Wednesday)
  • %a: abbreviated weekday (Wed)
  • %B: month (January)
  • %b: abbreviated month (Jan)
as.Date("1982-01-13")
## [1] "1982-01-13"
as.Date("Jan-13-82", format = "%b-%d-%y")
## [1] "1982-01-13"
as.Date("13 January, 1982", format = "%d %B, %Y")
## [1] "1982-01-13"
  • %H: hours as a decimal number (00-23)
  • %I: hours as a decimal number (01-12)
  • %M: minutes as a decimal number
  • %S: seconds as a decimal number
  • %T: shorthand notation for the typical format %H:%M:%S
  • %p: AM/PM indicator
as.POSIXct("2012-5-12 14:23:08")
## [1] "2012-05-12 14:23:08 EDT"
as.POSIXct("May 12, '12 hours:14 minutes:23 seconds:08", 
           format = "%B %d, '%y hours:%H minutes:%M seconds:%S")
## [1] "2012-05-12 14:23:08 EDT"

Reformatting

Sys.Date()
## [1] "2019-01-10"
format(Sys.Date(), format = "%d %B, %Y")
## [1] "10 January, 2019"
format(Sys.Date(), format = "Today is a %A!")
## [1] "Today is a Thursday!"